Spread within variables



In [2]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
pd.__version__ # need 0.14.0 for multiindex slicing









    



Populating the interactive namespace from numpy and matplotlib






    Out[2]:





'0.14.1'

Read files



In [3]:

    
o = pd.read_table("overall_statistics_ksmall.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack().loc[(10,200),:]
v = pd.read_table("variable_statistics_ksmall.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack().loc[(10,200),:].unstack()



In [4]:

    
statistics_of_interest = ["rms_error","max_error","precisionbits","srr","correlation"]

Load variable information



In [5]:

    
# load variable information for joining levels to variables
v_info = pd.read_table("variable_information.txt").set_index(["VARIABLE","INFO"]).unstack().loc[:,"VALUE"]
v_info["levels"] = v_info["levels"].astype("int")
v_info.columns.name = ""

Sort by RMS error



In [8]:

    
v.sort("rms_error")[["rms_error","max_error","precisionbits","srr"]].join(v_info["name"]).tail(5)









    Out[8]:






  
    
      
      rms_error
      max_error
      precisionbits
      srr
      name
    
    
      VARIABLE
      
      
      
      
      
    
  
  
    
      FREQS
       0.002167
       0.051397
       3.28217
       6.26081
                Fractional occurance of snow
    
    
      FREQZM
       0.002256
       0.022684
       4.46221
       6.60402
       Fractional occurance of ZM convection
    
    
      PSL
       0.002299
       0.024323
       4.36156
       6.29992
                          Sea level pressure
    
    
      CLDMED
       0.002491
       0.030366
       4.04140
       6.07505
       Vertically-integrated mid-level cloud
    
    
      SSTSFMBL
       0.002599
       0.062668
       2.99611
       6.28976
                Mobilization flux at surface



In [ ]:

    
v.sort("rms_error")[statistics_of_interest].join(v_info).tail(10)

Sort by maximum error



In [ ]:

    
v.sort("max_error")[statistics_of_interest].join(v_info).head(10)



In [ ]:

    
v.sort("max_error")[statistics_of_interest].join(v_info).tail(10)

Ratio maximum error / RMS error



In [ ]:

    
v["error_ratio"] = v.max_error / v.rms_error
er = v.loc(axis=1)[("error_ratio",)].sort("error_ratio")
(er.mean(), er.median(),er.head(5),er.tail(5))



In [ ]:

    
v.loc[["U","FSDSC","Z3","CCN3"],["max_error","rms_error"]].T

	rms_error	max_error	precisionbits	srr	name
VARIABLE
FREQS	0.002167	0.051397	3.28217	6.26081	Fractional occurance of snow
FREQZM	0.002256	0.022684	4.46221	6.60402	Fractional occurance of ZM convection
PSL	0.002299	0.024323	4.36156	6.29992	Sea level pressure
CLDMED	0.002491	0.030366	4.04140	6.07505	Vertically-integrated mid-level cloud
SSTSFMBL	0.002599	0.062668	2.99611	6.28976	Mobilization flux at surface